In [69]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

%matplotlib inline
In [70]:
data_cam_df = pd.read_csv('xcms_pigs_camera.csv', sep = ',', index_col=0)
In [71]:
service_cols = ["mz", "mzmin", "mzmax", "rt", "rtmin", "rtmax", ".", "npeaks", "isotopes", "adduct", "pcgroup"]
assert len(set(service_cols) - set(data_cam_df.columns))==0
samples_cols = list(set(data_cam_df.columns) - set(service_cols))
In [72]:
data_cam_df[data_cam_df==0]=np.nan
data_cam_df[samples_cols] = np.log(data_cam_df[samples_cols])

standards

TAG

In [73]:
new_mz_palm = np.abs((data_cam_df['mz'] - 811.765)) / 811.765 
new_mz_palm = new_mz_palm * 1000000
sunf_ppm_palm = data_cam_df[new_mz_palm<13]
In [74]:
sunf_ppm_palm.head()
sunf_ppm_palm.index
Out[74]:
Int64Index([2011], dtype='int64')
In [75]:
RO=pd.Series([i.split('_')[3][1:] if 'S' in i else i for i in data_cam_df.columns],index=data_cam_df.columns)
In [76]:
for i in RO.index:
    try:
        RO[i]=int(RO[i])
    except:
        RO[i]=np.nan
In [77]:
RO=RO.dropna()

LPS

In [78]:
plt.scatter(RO,data_cam_df.loc[1,RO.index], color = 'rebeccapurple')
Out[78]:
<matplotlib.collections.PathCollection at 0x7f9774bb2320>
In [ ]:
 

filtration by rt

rt(sec) -> rt(min)

In [79]:
rt_in_min = data_cam_df['rt']/60
data_cam_df['rt'] = rt_in_min
data_cam_filt_rt_df = data_cam_df[(data_cam_df['rt'] > 0.6) & (data_cam_df['rt'] < 19)]
data_cam_filt_rt_df.head(4)
Out[79]:
mz mzmin mzmax rt rtmin rtmax npeaks . X171206_pigs_scat1_3_15_pos_1.100 X171207_pigs_BF1_10_1.50_pos ... X171208_pigs_scat2_15_1.100_pos X171208_pigs_scat2_18_1.100_pos X171208_pigs_scat2_2_1.100_pos X171208_pigs_scat2_3_1.100_pos X171208_pigs_scat2_6_1.100_pos X171208_pigs_scat2_8_1.100_pos X171208_pigs_scat2_9_1.100_pos isotopes adduct pcgroup
1 130.158684 130.158150 130.159207 0.660983 38.049 127.265 79 75 11.640429 12.005639 ... 11.927535 11.867717 12.015332 12.048855 11.646204 12.103320 11.972464 NaN NaN 105
2 133.100423 133.099922 133.101013 3.391133 202.231 204.805 69 69 10.248525 11.050215 ... 10.273095 9.808182 10.138165 9.222282 10.018860 10.075920 9.181783 NaN [M+3H-C6H10O4]3+ 542.333 37
3 147.064214 147.063614 147.064651 0.970192 56.181 61.209 74 73 11.269131 8.689696 ... 9.952731 9.736448 10.247801 9.891781 10.268355 9.772705 9.975347 NaN [M+3H-CH2]3+ 452.189 [M+2H-C6H8O6]2+ 468.159 17
4 149.022196 149.021515 149.022699 4.672275 268.144 286.492 140 75 14.002570 13.957953 ... 14.030105 14.237696 14.040697 14.005739 14.212340 14.243009 14.031754 NaN NaN 61

4 rows × 88 columns

In [80]:
plt.gcf().set_size_inches(24,19)
plt.scatter(data_cam_filt_rt_df['rt'], data_cam_filt_rt_df['mz'], color = 'rebeccapurple', s = 50)
Out[80]:
<matplotlib.collections.PathCollection at 0x7f9774b87908>
In [81]:
del_isotopes = data_cam_filt_rt_df['isotopes'].str.match(r'\[\d+\]\[M\+\d+\]\+').fillna(False)
#[m][M+n]+ где n от 1, m from 1
data_cam_filt_rt_iso_df = data_cam_filt_rt_df[~del_isotopes]
In [82]:
plt.gcf().set_size_inches(25,20)
plt.scatter(data_cam_filt_rt_iso_df['rt'], data_cam_filt_rt_iso_df['mz'], s = 50)
Out[82]:
<matplotlib.collections.PathCollection at 0x7f9774b67e10>

PCA

In [83]:
lm1_samples = data_cam_filt_rt_iso_df[[col for col in samples_cols if 'LM1' in col]]
lm2_samples = data_cam_filt_rt_iso_df[[col for col in samples_cols if 'LM2'  in col]]
lm3_samples = data_cam_filt_rt_iso_df[[col for col in samples_cols if 'LM3' in col]]
bf1_samples = data_cam_filt_rt_iso_df[[col for col in samples_cols if 'BF1' in col]]
bf2_samples = data_cam_filt_rt_iso_df[[col for col in samples_cols if 'BF2' in col]]
bf3_samples = data_cam_filt_rt_iso_df[[col for col in samples_cols if 'BF3' in col]]
scat1_samples = data_cam_filt_rt_iso_df[[col for col in samples_cols if 'scat1' in col]]
scat2_samples = data_cam_filt_rt_iso_df[[col for col in samples_cols if 'scat2' in col]]
In [84]:
samples_filled_nan = data_cam_filt_rt_iso_df[samples_cols].fillna(0).transpose().as_matrix()
In [85]:
pca = PCA(2)

transformed_samples = pca.fit_transform(samples_filled_nan)
In [86]:
transformed_samples_df = pd.DataFrame(transformed_samples.T, columns = samples_cols)
In [87]:
plt.gcf().set_size_inches(15,10)
#plt.scatter(transformed_samples[:,0], transformed_samples[:,1], s = 200)
plt.scatter(transformed_samples_df[scat1_samples.columns].loc[0], transformed_samples_df[scat1_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#FF00FF',  label="scat1")
plt.scatter(transformed_samples_df[scat2_samples.columns].loc[0], transformed_samples_df[scat2_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='black',  label="scat2")
plt.legend() 
Out[87]:
<matplotlib.legend.Legend at 0x7f9774b14668>
In [88]:
plt.gcf().set_size_inches(15,10)
#plt.scatter(transformed_samples[:,0], transformed_samples[:,1], s = 200)
plt.scatter(transformed_samples_df[lm1_samples.columns].loc[0], transformed_samples_df[lm1_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#FF0000',  label="LM1")
plt.scatter(transformed_samples_df[lm2_samples.columns].loc[0], transformed_samples_df[lm2_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#FFFF00',  label="LM2")
plt.scatter(transformed_samples_df[lm3_samples.columns].loc[0], transformed_samples_df[lm3_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#00FF00',  label="LM3")
plt.scatter(transformed_samples_df[bf1_samples.columns].loc[0], transformed_samples_df[bf1_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#D6BCC0',  label="bf1")
plt.scatter(transformed_samples_df[bf2_samples.columns].loc[0], transformed_samples_df[bf2_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#00FFFF',  label="bf2")
plt.scatter(transformed_samples_df[bf3_samples.columns].loc[0], transformed_samples_df[bf3_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#0000FF',  label="bf3")
plt.legend() 
Out[88]:
<matplotlib.legend.Legend at 0x7f9774a50e10>
In [89]:
plt.gcf().set_size_inches(15,10)
#plt.scatter(transformed_samples[:,0], transformed_samples[:,1], s = 200)
plt.scatter(transformed_samples_df[lm1_samples.columns].loc[0], transformed_samples_df[lm1_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#FF0000',  label="LM1")
plt.scatter(transformed_samples_df[lm2_samples.columns].loc[0], transformed_samples_df[lm2_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#FFFF00',  label="LM2")
plt.scatter(transformed_samples_df[lm3_samples.columns].loc[0], transformed_samples_df[lm3_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#00FF00',  label="LM3")
plt.legend() 
Out[89]:
<matplotlib.legend.Legend at 0x7f97749c3a58>
In [90]:
plt.gcf().set_size_inches(20,15)
#plt.scatter(transformed_samples[:,0], transformed_samples[:,1], s = 200)
plt.scatter(transformed_samples_df[lm1_samples.columns].loc[0], transformed_samples_df[lm1_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#FF0000',  label="LM1")
plt.scatter(transformed_samples_df[lm2_samples.columns].loc[0], transformed_samples_df[lm2_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#FFFF00',  label="LM2")
plt.scatter(transformed_samples_df[lm3_samples.columns].loc[0], transformed_samples_df[lm3_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#00FF00',  label="LM3")
plt.scatter(transformed_samples_df[bf1_samples.columns].loc[0], transformed_samples_df[bf1_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#D6BCC0',  label="bf1")
plt.scatter(transformed_samples_df[bf2_samples.columns].loc[0], transformed_samples_df[bf2_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#00FFFF',  label="bf2")
plt.scatter(transformed_samples_df[bf3_samples.columns].loc[0], transformed_samples_df[bf3_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#0000FF',  label="bf3")
plt.scatter(transformed_samples_df[scat1_samples.columns].loc[0], transformed_samples_df[scat1_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='#FF00FF',  label="scat1")
plt.scatter(transformed_samples_df[scat2_samples.columns].loc[0], transformed_samples_df[scat2_samples.columns].loc[1],
             s = 150, facecolors='none', edgecolors='black',  label="scat2")
plt.legend() 
Out[90]:
<matplotlib.legend.Legend at 0x7f9774947f60>
In [91]:
def add_mz_rt_cols(df):
    return data_cam_filt_rt_iso_df[df.columns.tolist() + ['mz', 'rt']]

lm1_samples_with_mz_rt = add_mz_rt_cols(lm1_samples)

after Anya filtering

In [92]:
filtering_res = np.load('cleanedpeaks.npy')
In [93]:
filtering_res_indices = sorted(set(data_cam_filt_rt_iso_df.index) & set(filtering_res))
In [94]:
data_filtering = data_cam_filt_rt_iso_df.loc[filtering_res_indices]
In [95]:
type(data_filtering)
Out[95]:
pandas.core.frame.DataFrame
In [ ]:
 
In [96]:
plt.gcf().set_size_inches(25,20)
plt.scatter(data_filtering['rt'], data_filtering['mz'])
Out[96]:
<matplotlib.collections.PathCollection at 0x7f97748bfef0>

annotation

In [97]:
annot_data = pd.read_csv('xcms_pigs_camera.csv.ann.txt', sep = ',', index_col=0)
#loading data
In [98]:
lmfa_index = annot_data['lm_id'].str.contains('LMFA0103').fillna(False)
#строки в annot_data которые содержать LMFA0101 + что-то
lmfa_annot_data = annot_data[lmfa_index]
lmfa_annot_data = annot_data[lmfa_index]
#беру из annot_data строки которые соответствуют строкам в которых есть LMFA0101 + что-то
In [99]:
annot_data_no_null = annot_data[~annot_data['lm_id'].isnull()]
In [100]:
annot_groups = []

for index, data in annot_data_no_null.groupby(annot_data_no_null.index):
    annot_groups.append({
        "index": index,
        "lm_id": ";".join(data.lm_id),
        "adduct_annot": ";".join(data.adduct)
    })
    
annot_groups = pd.DataFrame(annot_groups)
annot_groups.set_index('index', inplace=True)
In [101]:
data_cam_filt_rt_iso_df_annot = pd.merge(data_cam_filt_rt_iso_df, annot_groups, left_index=True, right_index=True)
In [102]:
plt.gcf().set_size_inches(25,20)
plt.scatter(data_cam_filt_rt_iso_df_annot['rt'], data_cam_filt_rt_iso_df_annot['mz'])
Out[102]:
<matplotlib.collections.PathCollection at 0x7f9788c6cda0>
In [103]:
lmfa_index3 = annot_data['lm_id'].str.contains('LMFA0103').fillna(False)
#строки в annot_data которые содержать LMFA0101 + что-то
lmfa_annot_data3 = annot_data[lmfa_index3]
lmfa_annot_data3 = annot_data[lmfa_index3]
#беру из annot_data строки которые соответствуют строкам в которых есть LMFA0101 + что-то
In [104]:
lmfa_annot_data3
Out[104]:
lm_id adduct ppm
8 LMFA01030783 M+H -3.781975
23 LMFA01030981 M+H -5.919063
37 LMFA01030240 M+Na 8.363737
37 LMFA01030241 M+Na 8.363737
37 LMFA01030242 M+Na 8.363737
37 LMFA01030464 M+Na 8.363737
118 LMFA01030109 M+Na -3.746604
118 LMFA01030268 M+Na -3.746604
118 LMFA01030269 M+Na -3.746604
118 LMFA01030270 M+Na -3.746604
118 LMFA01030271 M+Na -3.746604
118 LMFA01030272 M+Na -3.746604
118 LMFA01030273 M+Na -3.746604
118 LMFA01030274 M+Na -3.746604
118 LMFA01030275 M+Na -3.746604
118 LMFA01030494 M+Na -3.746604
118 LMFA01030495 M+Na -3.746604
118 LMFA01030496 M+Na -3.746604
118 LMFA01030497 M+Na -3.746604
118 LMFA01030806 M+Na -3.746604
118 LMFA01030807 M+Na -3.746604
118 LMFA01030814 M+Na -3.746604
157 LMFA01030140 M+Na -7.663853
157 LMFA01030141 M+Na -7.663853
157 LMFA01030142 M+Na -7.663853
157 LMFA01030143 M+Na -7.663853
157 LMFA01030144 M+Na -7.663853
157 LMFA01030145 M+Na -7.663853
157 LMFA01030146 M+Na -7.663853
157 LMFA01030147 M+Na -7.663853
... ... ... ...
562 LMFA01030827 M+H -8.882272
562 LMFA01030097 M+Na -3.969940
562 LMFA01030875 M+Na -3.969940
562 LMFA01031013 M+Na -3.969940
581 LMFA01030098 M+H -3.526081
598 LMFA01030889 M+H 4.952384
640 LMFA01030828 M+H -8.466893
640 LMFA01030098 M+Na -3.823263
641 LMFA01030828 M+H -8.163154
641 LMFA01030098 M+Na -3.505568
711 LMFA01030842 M+Na -3.624138
750 LMFA01030836 M+H -8.143040
750 LMFA01030850 M+H -8.143040
751 LMFA01030836 M+H -7.860982
751 LMFA01030850 M+H -7.860982
757 LMFA01030829 M+H -7.950802
758 LMFA01030829 M+H -8.246013
826 LMFA01030836 M+NH4 6.209551
826 LMFA01030850 M+NH4 6.209551
834 LMFA01030829 M+NH4 7.563012
853 LMFA01030836 M+Na -3.628547
853 LMFA01030850 M+Na -3.628547
896 LMFA01030837 M+H -7.976583
896 LMFA01030851 M+H -7.976583
897 LMFA01030837 M+H -7.722373
897 LMFA01030851 M+H -7.722373
911 LMFA01030830 M+H -9.504499
971 LMFA01030844 M+NH4 6.230893
986 LMFA01030837 M+NH4 5.213680
986 LMFA01030851 M+NH4 5.213680

118 rows × 3 columns

In [105]:
data_cam_filt_rt_iso_df_annot.rename(columns={"X171208_pigs_LM3_1_11_1.50_pos": "X171208_pigs_LM3_11_1.50_pos"}, inplace=True)
In [106]:
import re
part_pattern = re.compile(r"_pigs_(\S+?)_")

def get_pig_part(col_name):
    return part_pattern.search(col_name).group(1)
    
    
parts_mapping = {}
for col in data_cam_filt_rt_iso_df_annot.columns:
    if "QC" in col:
        continue
    
    try:
        parts_mapping.setdefault(get_pig_part(col), []).append(col) 
    except:
        pass
In [107]:
for part, columns in parts_mapping.items():

    f, axarr = plt.subplots(len(columns), sharex=True, figsize=(15,20))
    
    for ax, col in zip(axarr, columns):
        ax.scatter(data_cam_filt_rt_iso_df_annot.mz, data_cam_filt_rt_iso_df_annot[col])
        ax.set_title(col)
    
    f.suptitle(part)
    f.subplots_adjust(top=0.95)
    
    plt.show()
In [108]:
pigs_pattern = re.compile(r"_pigs_\S+?_(\S+?)_")


def get_pig_num(col_name):
    return int(pigs_pattern.search(col_name).group(1))
    
    
pigs_mapping = {}
for col in data_cam_filt_rt_iso_df_annot.columns:
    if "QC" in col or "scat1" in col:
        continue
    
    try:
        pigs_mapping.setdefault(get_pig_num(col), []).append(col) 
    except:
        pass
In [109]:
for pig, columns in sorted(pigs_mapping.items()):
    
    columns = sorted(columns)

    f, axarr = plt.subplots(len(columns), sharex=True, figsize=(15,20))
    
    for ax, col in zip(axarr, columns):
        ax.scatter(data_cam_filt_rt_iso_df_annot.mz, data_cam_filt_rt_iso_df_annot[col])
        ax.set_title(col)
    
    f.suptitle("Pig {0}".format(pig))
    f.subplots_adjust(top=0.95)
    
    plt.show()
In [110]:
parts_dfs = {part_name: data_cam_filt_rt_iso_df_annot[cols].T for part_name, cols in parts_mapping.items() }
parts_dfs = list(parts_dfs.items())
In [111]:
from rpy2.robjects import r, pandas2ri

pandas2ri.activate()
from rpy2.robjects.packages import importr

Hotelling = importr('Hotelling')

def hotelling_test(df1, df2):
    df1_r = pandas2ri.py2ri(df1)
    df2_r = pandas2ri.py2ri(df2)
    return Hotelling.hotelling_test(df1_r, df2_r)
    
In [112]:
Hotelling.hotelling_test()
/home/anna/anaconda3/lib/python3.6/site-packages/rpy2/rinterface/__init__.py:145: RRuntimeWarning: Error in nrow(x) : argument "x" is missing, with no default

  warnings.warn(x, RRuntimeWarning)
---------------------------------------------------------------------------
RRuntimeError                             Traceback (most recent call last)
<ipython-input-112-014e8f7bcb62> in <module>()
----> 1 Hotelling.hotelling_test()

~/anaconda3/lib/python3.6/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
    176                 v = kwargs.pop(k)
    177                 kwargs[r_k] = v
--> 178         return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs)
    179 
    180 pattern_link = re.compile(r'\\link\{(.+?)\}')

~/anaconda3/lib/python3.6/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
    104         for k, v in kwargs.items():
    105             new_kwargs[k] = conversion.py2ri(v)
--> 106         res = super(Function, self).__call__(*new_args, **new_kwargs)
    107         res = conversion.ri2ro(res)
    108         return res

RRuntimeError: Error in nrow(x) : argument "x" is missing, with no default
In [113]:
for i in range(len(parts_dfs)):
    for j in range(i+1, len(parts_dfs)):
        part1, df1 = parts_dfs[i]
        part2, df2 = parts_dfs[j]
        
        test_result = hotelling_test(df1, df2)
        print(test_result)
        break
    break
/home/anna/anaconda3/lib/python3.6/site-packages/rpy2/rinterface/__init__.py:145: RRuntimeWarning: Error in hotelling.stat(x, y, shrinkage) : 
  The sample sizes (nx + ny) must be 1 greater than the number of columns

  warnings.warn(x, RRuntimeWarning)
---------------------------------------------------------------------------
RRuntimeError                             Traceback (most recent call last)
<ipython-input-113-50055bf7f4f8> in <module>()
      4         part2, df2 = parts_dfs[j]
      5 
----> 6         test_result = hotelling_test(df1, df2)
      7         print(test_result)
      8         break

<ipython-input-111-f4ad68cabc80> in hotelling_test(df1, df2)
      9     df1_r = pandas2ri.py2ri(df1)
     10     df2_r = pandas2ri.py2ri(df2)
---> 11     return Hotelling.hotelling_test(df1_r, df2_r)
     12 

~/anaconda3/lib/python3.6/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
    176                 v = kwargs.pop(k)
    177                 kwargs[r_k] = v
--> 178         return super(SignatureTranslatedFunction, self).__call__(*args, **kwargs)
    179 
    180 pattern_link = re.compile(r'\\link\{(.+?)\}')

~/anaconda3/lib/python3.6/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
    104         for k, v in kwargs.items():
    105             new_kwargs[k] = conversion.py2ri(v)
--> 106         res = super(Function, self).__call__(*new_args, **new_kwargs)
    107         res = conversion.ri2ro(res)
    108         return res

RRuntimeError: Error in hotelling.stat(x, y, shrinkage) : 
  The sample sizes (nx + ny) must be 1 greater than the number of columns
In [114]:
samples = data_cam_filt_rt_iso_df_annot[[col for col in data_cam_filt_rt_iso_df_annot.columns if col.startswith("X") and "QC" not in col]]
In [115]:
samples.columns
Out[115]:
Index(['X171206_pigs_scat1_3_15_pos_1.100', 'X171207_pigs_BF1_10_1.50_pos',
       'X171207_pigs_BF1_11_1.50_pos', 'X171207_pigs_BF1_15_1.50_pos',
       'X171207_pigs_BF1_2_1.50_pos', 'X171207_pigs_BF1_3_1.50_pos',
       'X171207_pigs_BF1_6_1.50_pos', 'X171207_pigs_BF1_9_1.50_pos',
       'X171207_pigs_BF2_10_1.50_pos', 'X171207_pigs_BF2_11_1.50_pos',
       'X171207_pigs_BF2_15_1.50_pos', 'X171207_pigs_BF2_18_1.50_pos',
       'X171207_pigs_BF2_2_1.50_pos', 'X171207_pigs_BF2_3_1.50_pos',
       'X171207_pigs_BF2_6_1.50_pos', 'X171207_pigs_BF2_8_1.50_pos',
       'X171207_pigs_BF2_9_1.50_pos', 'X171207_pigs_BF3_10_1.50_pos',
       'X171207_pigs_BF3_11_1.50_pos', 'X171207_pigs_BF3_18_1.50_pos',
       'X171207_pigs_BF3_2_1.50_pos', 'X171207_pigs_BF3_3_1.50_pos',
       'X171207_pigs_BF3_6_1.50_pos', 'X171207_pigs_BF3_8_1.50_pos',
       'X171207_pigs_BF3_9_1.50_pos', 'X171207_pigs_LM1_10_1.50_pos',
       'X171207_pigs_LM1_11_1.50_pos', 'X171207_pigs_LM1_18_1.50_pos',
       'X171207_pigs_LM1_2_1.50_pos', 'X171207_pigs_LM1_3_1.50_pos',
       'X171207_pigs_LM1_6_1.50_pos', 'X171207_pigs_LM1_8_1.50_pos',
       'X171207_pigs_LM1_9_1.50_pos', 'X171207_pigs_LM2_10_1.50_pos',
       'X171207_pigs_LM2_11_1.50_pos', 'X171207_pigs_LM2_15_1.50_pos',
       'X171207_pigs_LM2_18_1.50_pos', 'X171207_pigs_LM2_2_1.50_pos',
       'X171207_pigs_LM2_3_1.50_pos', 'X171207_pigs_LM2_6_1.50_pos',
       'X171207_pigs_LM2_9_1.50_pos', 'X171207_pigs_LM3_10_1.50_pos',
       'X171207_pigs_LM3_18_1.50_pos', 'X171207_pigs_LM3_2_1.50_pos',
       'X171207_pigs_LM3_3_1.50_pos', 'X171207_pigs_LM3_6_1.50_pos',
       'X171207_pigs_LM3_8_1.50_pos', 'X171207_pigs_LM3_9_1.50_pos',
       'X171208_pigs_BF1_18_1.50_pos', 'X171208_pigs_BF1_8_1.50_pos',
       'X171208_pigs_LM1_15_1.50_pos', 'X171208_pigs_LM2_8_1.50_pos',
       'X171208_pigs_LM3_11_1.50_pos', 'X171208_pigs_scat1_1_15_1.100_pos',
       'X171208_pigs_scat1_10_1.100_pos', 'X171208_pigs_scat1_11_1.100_pos',
       'X171208_pigs_scat1_18_1.100_pos', 'X171208_pigs_scat1_2_1.100_pos',
       'X171208_pigs_scat1_2_15_1.100_pos', 'X171208_pigs_scat1_3_1.100_pos',
       'X171208_pigs_scat1_6_1.100_pos', 'X171208_pigs_scat1_8_1.100_pos',
       'X171208_pigs_scat1_9_1.100_pos', 'X171208_pigs_scat2_10_1.100_pos',
       'X171208_pigs_scat2_11_1.100_pos', 'X171208_pigs_scat2_15_1.100_pos',
       'X171208_pigs_scat2_18_1.100_pos', 'X171208_pigs_scat2_2_1.100_pos',
       'X171208_pigs_scat2_3_1.100_pos', 'X171208_pigs_scat2_6_1.100_pos',
       'X171208_pigs_scat2_8_1.100_pos', 'X171208_pigs_scat2_9_1.100_pos'],
      dtype='object')
In [116]:
del samples['X171206_pigs_scat1_3_15_pos_1.100']
del samples['X171208_pigs_scat1_2_15_1.100_pos']
del samples['X171208_pigs_scat1_1_15_1.100_pos']
In [130]:
import seaborn

plt.gcf().set_size_inches(30, 30)

seaborn.heatmap(samples.corr(), cmap="BuPu")
Out[130]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f976bcd2978>
In [118]:
samples.corr()
Out[118]:
X171207_pigs_BF1_10_1.50_pos X171207_pigs_BF1_11_1.50_pos X171207_pigs_BF1_15_1.50_pos X171207_pigs_BF1_2_1.50_pos X171207_pigs_BF1_3_1.50_pos X171207_pigs_BF1_6_1.50_pos X171207_pigs_BF1_9_1.50_pos X171207_pigs_BF2_10_1.50_pos X171207_pigs_BF2_11_1.50_pos X171207_pigs_BF2_15_1.50_pos ... X171208_pigs_scat1_9_1.100_pos X171208_pigs_scat2_10_1.100_pos X171208_pigs_scat2_11_1.100_pos X171208_pigs_scat2_15_1.100_pos X171208_pigs_scat2_18_1.100_pos X171208_pigs_scat2_2_1.100_pos X171208_pigs_scat2_3_1.100_pos X171208_pigs_scat2_6_1.100_pos X171208_pigs_scat2_8_1.100_pos X171208_pigs_scat2_9_1.100_pos
X171207_pigs_BF1_10_1.50_pos 1.000000 0.952676 0.929048 0.947895 0.954618 0.977060 0.807723 0.957207 0.967452 0.938266 ... 0.663466 0.613097 0.616398 0.699769 0.693704 0.661164 0.653195 0.702394 0.666744 0.609580
X171207_pigs_BF1_11_1.50_pos 0.952676 1.000000 0.940916 0.958649 0.944520 0.959794 0.845386 0.955763 0.964474 0.947675 ... 0.666710 0.621642 0.638461 0.680199 0.684007 0.648964 0.667164 0.688275 0.651494 0.616932
X171207_pigs_BF1_15_1.50_pos 0.929048 0.940916 1.000000 0.946717 0.933207 0.947778 0.919069 0.917622 0.955230 0.916719 ... 0.795161 0.750531 0.763830 0.817512 0.813470 0.782859 0.782871 0.816300 0.784726 0.750010
X171207_pigs_BF1_2_1.50_pos 0.947895 0.958649 0.946717 1.000000 0.947165 0.952617 0.858592 0.948949 0.949042 0.943564 ... 0.698739 0.660525 0.667846 0.713032 0.716613 0.679736 0.696426 0.719277 0.682727 0.652503
X171207_pigs_BF1_3_1.50_pos 0.954618 0.944520 0.933207 0.947165 1.000000 0.965772 0.839962 0.953786 0.962842 0.967188 ... 0.711236 0.679845 0.680427 0.741346 0.738151 0.707391 0.704832 0.742048 0.709663 0.674711
X171207_pigs_BF1_6_1.50_pos 0.977060 0.959794 0.947778 0.952617 0.965772 1.000000 0.847298 0.958190 0.980515 0.952061 ... 0.693073 0.649620 0.658353 0.721095 0.723034 0.687175 0.678868 0.722682 0.695254 0.642514
X171207_pigs_BF1_9_1.50_pos 0.807723 0.845386 0.919069 0.858592 0.839962 0.847298 1.000000 0.810759 0.863282 0.809500 ... 0.822221 0.773047 0.811354 0.834332 0.844415 0.812999 0.821724 0.842613 0.811470 0.779551
X171207_pigs_BF2_10_1.50_pos 0.957207 0.955763 0.917622 0.948949 0.953786 0.958190 0.810759 1.000000 0.945437 0.975207 ... 0.679465 0.635173 0.639172 0.696391 0.698155 0.656037 0.678949 0.701784 0.658542 0.628975
X171207_pigs_BF2_11_1.50_pos 0.967452 0.964474 0.955230 0.949042 0.962842 0.980515 0.863282 0.945437 1.000000 0.941598 ... 0.708523 0.674469 0.682948 0.742731 0.740601 0.720045 0.702361 0.747594 0.718373 0.669334
X171207_pigs_BF2_15_1.50_pos 0.938266 0.947675 0.916719 0.943564 0.967188 0.952061 0.809500 0.975207 0.941598 1.000000 ... 0.694795 0.657911 0.660733 0.713085 0.710484 0.675790 0.696524 0.717557 0.674423 0.654510
X171207_pigs_BF2_18_1.50_pos 0.968915 0.956604 0.945143 0.956175 0.959912 0.970803 0.846110 0.944677 0.968102 0.937330 ... 0.675441 0.630920 0.640060 0.712220 0.705467 0.675703 0.664442 0.716147 0.671638 0.624924
X171207_pigs_BF2_2_1.50_pos 0.964296 0.960572 0.938414 0.945522 0.962238 0.974156 0.828105 0.953041 0.971337 0.945583 ... 0.692401 0.655405 0.658901 0.723409 0.718160 0.688379 0.676623 0.717394 0.694686 0.646477
X171207_pigs_BF2_3_1.50_pos 0.976915 0.956235 0.949610 0.956875 0.961121 0.973358 0.833700 0.958172 0.965380 0.952793 ... 0.701507 0.653229 0.658557 0.732848 0.731898 0.694534 0.690933 0.729114 0.701435 0.647121
X171207_pigs_BF2_6_1.50_pos 0.970011 0.957703 0.936854 0.951544 0.968945 0.979841 0.826337 0.952841 0.971619 0.946382 ... 0.687408 0.658682 0.658074 0.710533 0.710047 0.682078 0.671217 0.714330 0.687916 0.646850
X171207_pigs_BF2_8_1.50_pos 0.969562 0.965536 0.937218 0.961998 0.962936 0.973039 0.823544 0.962738 0.957556 0.962165 ... 0.681309 0.632713 0.636677 0.693326 0.693320 0.648566 0.662054 0.691307 0.661147 0.630183
X171207_pigs_BF2_9_1.50_pos 0.946071 0.943678 0.930485 0.937873 0.947739 0.960427 0.844083 0.953355 0.955447 0.949614 ... 0.686369 0.634245 0.653203 0.718047 0.714355 0.693303 0.684999 0.725324 0.680536 0.632087
X171207_pigs_BF3_10_1.50_pos 0.872141 0.889474 0.942130 0.900890 0.865873 0.894765 0.954815 0.854691 0.897248 0.835172 ... 0.773350 0.722778 0.753150 0.797317 0.804976 0.768229 0.767431 0.802683 0.771485 0.719924
X171207_pigs_BF3_11_1.50_pos 0.943879 0.951312 0.944357 0.949147 0.943923 0.963417 0.891160 0.925238 0.964456 0.915769 ... 0.704535 0.664746 0.683152 0.732583 0.736856 0.710979 0.695631 0.739262 0.711205 0.656535
X171207_pigs_BF3_18_1.50_pos 0.862222 0.853582 0.885658 0.857317 0.894502 0.874214 0.790463 0.871907 0.885556 0.880083 ... 0.829978 0.816449 0.803749 0.823075 0.826903 0.809039 0.812266 0.826533 0.821986 0.806323
X171207_pigs_BF3_2_1.50_pos 0.883268 0.901995 0.925279 0.903636 0.878344 0.906585 0.922780 0.855379 0.902511 0.845492 ... 0.690659 0.631876 0.668583 0.726072 0.730719 0.692085 0.695384 0.732369 0.692279 0.634213
X171207_pigs_BF3_3_1.50_pos 0.977000 0.953804 0.947386 0.954837 0.966116 0.978291 0.829764 0.956292 0.970917 0.951661 ... 0.693922 0.648633 0.654763 0.726319 0.725385 0.692091 0.684549 0.725601 0.696783 0.643706
X171207_pigs_BF3_6_1.50_pos 0.909264 0.922073 0.962094 0.926802 0.908449 0.936284 0.945771 0.888979 0.937537 0.878717 ... 0.770758 0.720088 0.750382 0.792952 0.800542 0.766701 0.761162 0.799480 0.771275 0.721952
X171207_pigs_BF3_8_1.50_pos 0.956262 0.961217 0.929180 0.957758 0.948790 0.960382 0.825569 0.963775 0.945666 0.954434 ... 0.671332 0.619145 0.633586 0.681543 0.682020 0.639231 0.658808 0.684818 0.647546 0.619838
X171207_pigs_BF3_9_1.50_pos 0.905846 0.942609 0.906695 0.938375 0.903154 0.922961 0.856184 0.913493 0.915568 0.901201 ... 0.612758 0.560373 0.591103 0.633366 0.639847 0.595421 0.623118 0.645969 0.594135 0.558151
X171207_pigs_LM1_10_1.50_pos 0.943479 0.947472 0.953724 0.937690 0.929336 0.953109 0.906533 0.915804 0.957531 0.901185 ... 0.721629 0.677076 0.699649 0.763781 0.762365 0.738126 0.724555 0.769151 0.735892 0.670826
X171207_pigs_LM1_11_1.50_pos 0.975456 0.955303 0.955217 0.947874 0.957236 0.977964 0.849581 0.945821 0.974102 0.939947 ... 0.725912 0.679110 0.683749 0.755569 0.751153 0.721797 0.710862 0.757827 0.725787 0.674255
X171207_pigs_LM1_18_1.50_pos 0.947135 0.968833 0.946900 0.959800 0.946918 0.960035 0.878545 0.944255 0.961934 0.940844 ... 0.697591 0.659825 0.678813 0.724958 0.723468 0.695767 0.708819 0.733623 0.693591 0.656612
X171207_pigs_LM1_2_1.50_pos 0.952040 0.930667 0.951739 0.936912 0.958059 0.957704 0.848189 0.937024 0.957529 0.939905 ... 0.784399 0.751056 0.748316 0.805676 0.808359 0.777448 0.772324 0.807926 0.786558 0.747498
X171207_pigs_LM1_3_1.50_pos 0.974152 0.956023 0.937474 0.955471 0.964674 0.975687 0.834858 0.948098 0.963172 0.944467 ... 0.692121 0.644518 0.652344 0.724237 0.727657 0.687964 0.687350 0.726779 0.697177 0.641064
X171207_pigs_LM1_6_1.50_pos 0.973966 0.943169 0.931009 0.942071 0.952317 0.972208 0.811590 0.938305 0.964867 0.925317 ... 0.667467 0.640220 0.632509 0.711586 0.706202 0.678173 0.665426 0.709740 0.685364 0.623973
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
X171207_pigs_LM2_9_1.50_pos 0.945094 0.940714 0.934935 0.932746 0.926853 0.962057 0.873879 0.910150 0.957717 0.896609 ... 0.659346 0.615647 0.639605 0.696966 0.701462 0.675068 0.653959 0.708837 0.674847 0.609356
X171207_pigs_LM3_10_1.50_pos 0.874401 0.862132 0.853875 0.842796 0.901442 0.895927 0.781716 0.899955 0.886081 0.905053 ... 0.673531 0.650001 0.638618 0.709972 0.715837 0.683180 0.683464 0.718641 0.677979 0.634852
X171207_pigs_LM3_18_1.50_pos 0.960002 0.953089 0.960706 0.956098 0.953473 0.974861 0.875937 0.937538 0.972272 0.936465 ... 0.728775 0.690893 0.697737 0.764947 0.759759 0.732270 0.724406 0.767348 0.732701 0.681225
X171207_pigs_LM3_2_1.50_pos 0.961018 0.942704 0.935861 0.937059 0.952305 0.972043 0.856089 0.920997 0.969637 0.917740 ... 0.674929 0.634563 0.646825 0.719733 0.715942 0.690837 0.670641 0.720632 0.690668 0.629100
X171207_pigs_LM3_3_1.50_pos 0.955502 0.949666 0.944158 0.947036 0.951768 0.966632 0.879265 0.932152 0.962378 0.927201 ... 0.711451 0.672251 0.685661 0.747578 0.750947 0.723179 0.711100 0.754220 0.724850 0.664907
X171207_pigs_LM3_6_1.50_pos 0.941945 0.952392 0.953500 0.942311 0.925022 0.963630 0.884161 0.916472 0.963212 0.900804 ... 0.710538 0.675107 0.687886 0.736765 0.743674 0.712043 0.701784 0.742155 0.721276 0.662369
X171207_pigs_LM3_8_1.50_pos 0.945677 0.933358 0.941543 0.936626 0.969591 0.960734 0.864163 0.936479 0.961955 0.944624 ... 0.764618 0.744612 0.739596 0.794234 0.798641 0.774108 0.758915 0.798757 0.779674 0.731973
X171207_pigs_LM3_9_1.50_pos 0.894431 0.917970 0.929066 0.907122 0.888621 0.919810 0.906263 0.867721 0.924829 0.855864 ... 0.681353 0.636329 0.664950 0.722718 0.722857 0.694342 0.684529 0.726389 0.693828 0.630369
X171208_pigs_BF1_18_1.50_pos 0.940337 0.950485 0.941193 0.952058 0.967016 0.962117 0.851146 0.964242 0.954541 0.973375 ... 0.746041 0.710955 0.714854 0.753226 0.757235 0.720170 0.736962 0.756882 0.727223 0.709335
X171208_pigs_BF1_8_1.50_pos 0.951560 0.958758 0.936769 0.958105 0.963432 0.965190 0.833463 0.967712 0.954835 0.968539 ... 0.708211 0.675171 0.679182 0.721135 0.719177 0.682867 0.699650 0.721817 0.689867 0.669707
X171208_pigs_LM1_15_1.50_pos 0.914043 0.940347 0.905658 0.940786 0.937473 0.930149 0.836103 0.942362 0.929584 0.954333 ... 0.654021 0.620576 0.635307 0.687718 0.684852 0.655275 0.677068 0.699841 0.644681 0.616279
X171208_pigs_LM2_8_1.50_pos 0.939421 0.965689 0.934249 0.948692 0.951259 0.954084 0.861524 0.955061 0.957618 0.955641 ... 0.697877 0.668392 0.681582 0.723623 0.720694 0.694356 0.708109 0.730572 0.691914 0.664096
X171208_pigs_LM3_11_1.50_pos 0.899186 0.933073 0.907702 0.925499 0.892432 0.919842 0.870974 0.903651 0.921175 0.891459 ... 0.628886 0.581135 0.613182 0.665106 0.666813 0.634630 0.647218 0.674946 0.626192 0.579363
X171208_pigs_scat1_10_1.100_pos 0.667958 0.661090 0.792692 0.690351 0.722373 0.697177 0.827689 0.670878 0.720526 0.686847 ... 0.962480 0.957543 0.956975 0.978071 0.980676 0.973844 0.958061 0.976371 0.980574 0.957112
X171208_pigs_scat1_11_1.100_pos 0.622718 0.623972 0.767075 0.654256 0.681305 0.653726 0.825542 0.626736 0.686651 0.653817 ... 0.954183 0.944994 0.954163 0.967636 0.966845 0.970166 0.948745 0.969931 0.966009 0.952800
X171208_pigs_scat1_18_1.100_pos 0.702152 0.713187 0.829681 0.752890 0.755884 0.730347 0.855126 0.726468 0.750256 0.746849 ... 0.972400 0.945554 0.961988 0.956424 0.961111 0.948686 0.969376 0.957181 0.946526 0.957074
X171208_pigs_scat1_2_1.100_pos 0.639808 0.630143 0.775264 0.669603 0.688218 0.665376 0.819053 0.639249 0.692864 0.662351 ... 0.958857 0.939634 0.950358 0.973447 0.978536 0.976960 0.959983 0.973713 0.973274 0.949023
X171208_pigs_scat1_3_1.100_pos 0.612060 0.630820 0.762609 0.668230 0.667098 0.645026 0.805381 0.638804 0.671981 0.656123 ... 0.973592 0.961049 0.974153 0.944210 0.953490 0.949918 0.973371 0.944046 0.950559 0.972811
X171208_pigs_scat1_6_1.100_pos 0.670116 0.656343 0.795765 0.701703 0.705016 0.691292 0.828845 0.664668 0.717081 0.673119 ... 0.960656 0.949760 0.951356 0.969541 0.977296 0.973156 0.958926 0.973672 0.975153 0.950661
X171208_pigs_scat1_8_1.100_pos 0.650631 0.631272 0.762476 0.664792 0.712874 0.679241 0.783829 0.651246 0.704992 0.675662 ... 0.954742 0.964119 0.951438 0.964982 0.968697 0.974314 0.942024 0.966350 0.981787 0.960594
X171208_pigs_scat1_9_1.100_pos 0.663466 0.666710 0.795161 0.698739 0.711236 0.693073 0.822221 0.679465 0.708523 0.694795 ... 1.000000 0.963471 0.972855 0.960514 0.967857 0.953558 0.972505 0.958265 0.961019 0.977094
X171208_pigs_scat2_10_1.100_pos 0.613097 0.621642 0.750531 0.660525 0.679845 0.649620 0.773047 0.635173 0.674469 0.657911 ... 0.963471 1.000000 0.971444 0.939971 0.949860 0.951463 0.955744 0.944525 0.962729 0.978098
X171208_pigs_scat2_11_1.100_pos 0.616398 0.638461 0.763830 0.667846 0.680427 0.658353 0.811354 0.639172 0.682948 0.660733 ... 0.972855 0.971444 1.000000 0.943559 0.952420 0.960521 0.968451 0.947289 0.958759 0.979078
X171208_pigs_scat2_15_1.100_pos 0.699769 0.680199 0.817512 0.713032 0.741346 0.721095 0.834332 0.696391 0.742731 0.713085 ... 0.960514 0.939971 0.943559 1.000000 0.979164 0.967900 0.956549 0.980240 0.971217 0.943694
X171208_pigs_scat2_18_1.100_pos 0.693704 0.684007 0.813470 0.716613 0.738151 0.723034 0.844415 0.698155 0.740601 0.710484 ... 0.967857 0.949860 0.952420 0.979164 1.000000 0.972062 0.962801 0.979459 0.981764 0.948702
X171208_pigs_scat2_2_1.100_pos 0.661164 0.648964 0.782859 0.679736 0.707391 0.687175 0.812999 0.656037 0.720045 0.675790 ... 0.953558 0.951463 0.960521 0.967900 0.972062 1.000000 0.954759 0.975520 0.983055 0.955719
X171208_pigs_scat2_3_1.100_pos 0.653195 0.667164 0.782871 0.696426 0.704832 0.678868 0.821724 0.678949 0.702361 0.696524 ... 0.972505 0.955744 0.968451 0.956549 0.962801 0.954759 1.000000 0.960315 0.950188 0.964033
X171208_pigs_scat2_6_1.100_pos 0.702394 0.688275 0.816300 0.719277 0.742048 0.722682 0.842613 0.701784 0.747594 0.717557 ... 0.958265 0.944525 0.947289 0.980240 0.979459 0.975520 0.960315 1.000000 0.975587 0.946453
X171208_pigs_scat2_8_1.100_pos 0.666744 0.651494 0.784726 0.682727 0.709663 0.695254 0.811470 0.658542 0.718373 0.674423 ... 0.961019 0.962729 0.958759 0.971217 0.981764 0.983055 0.950188 0.975587 1.000000 0.958306
X171208_pigs_scat2_9_1.100_pos 0.609580 0.616932 0.750010 0.652503 0.674711 0.642514 0.779551 0.628975 0.669334 0.654510 ... 0.977094 0.978098 0.979078 0.943694 0.948702 0.955719 0.964033 0.946453 0.958306 1.000000

69 rows × 69 columns

In [119]:
def pigs_vectors(variables, dependents):
    X = []
    Y = []
    
    
    for pig, cols in pigs_mapping.items():
        if pig == 15:
            continue
        
        x = []
        y = []
        
        for var_name in variables:
            for col in cols:
                if var_name in col:
                    x.append(data_cam_filt_rt_iso_df_annot[col])
                             
        for dep_name in dependents:
            for col in cols:
                if dep_name in col:
                    y.append(data_cam_filt_rt_iso_df_annot[col])
                    
        
        X.append(np.concatenate(x))
        Y.append(np.concatenate(y))
        
    return np.array(X), np.array(Y)
In [120]:
X, Y = pigs_vectors(["BF1", "BF2", "BF3", "LM1", "LM2", "LM3"], ["scat2"])
In [121]:
X.shape
Out[121]:
(8, 5064)
In [122]:
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import LeaveOneOut
In [123]:
loo = LeaveOneOut()
pigs_mse = []

for train_index, test_index in loo.split(X):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]
    
    linreg = Ridge(alpha=1e6)
    linreg.fit(X_train, Y_train)
    square_errors =np.abs(Y_test - linreg.predict(X_test))
    mse = np.mean(square_errors)
    pigs_mse.append(mse)
    print("MSE", mse)
    plt.plot(Y_test.ravel(), label='True', c='b')
    plt.plot(linreg.predict(X_test).ravel(), label='Prediction', alpha=0.5, c='r')
    plt.legend()
    plt.show()

    
MSE 0.3015264333521279
MSE 0.22965618657839235
MSE 0.21672220819993834
MSE 0.2618086262098686
MSE 0.24181026846957154
MSE 0.27319800363207486
MSE 0.2468612927181379
MSE 0.20550821279283482
In [124]:
print("Average MSE", np.mean(pigs_mse))
print("Std MSE", np.std(pigs_mse))
Average MSE 0.2471364039941183
Std MSE 0.029250400401620642
In [125]:
parts_mse = []

for part1, part2 in zip(*pigs_vectors(["BF1"], ["BF2"])):
    square_errors =np.abs(part1.ravel() - part2.ravel())
    mse = np.mean(square_errors)
    parts_mse.append(mse)
    
print("MSE", np.mean(parts_mse))
MSE 0.4090785531590788
In [126]:
for bf, scat in zip(*pigs_vectors(["BF1"], ["scat2"])):
    plt.plot(bf.ravel(), label='BF1')
    plt.plot(scat.ravel(), label='scat2')
    plt.legend()
    plt.show()
In [127]:
data_cam_filt_rt_iso_df_annot.iloc[0]['lm_id']
Out[127]:
'LMFA11000592;LMFA11000636;LMFA11000637;LMFA11000638;LMFA11000659'

scat1

In [128]:
X, Y = pigs_vectors(["BF1", "BF2", "BF3", "LM1", "LM2", "LM3"], ["scat1"])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-128-3ce08417cdce> in <module>()
----> 1 X, Y = pigs_vectors(["BF1", "BF2", "BF3", "LM1", "LM2", "LM3"], ["scat1"])

<ipython-input-119-cb28485457da> in pigs_vectors(variables, dependents)
     23 
     24         X.append(np.concatenate(x))
---> 25         Y.append(np.concatenate(y))
     26 
     27     return np.array(X), np.array(Y)

ValueError: need at least one array to concatenate

MDS

In [129]:
print(__doc__)
import numpy as np

from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection

from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA
Automatically created module for IPython interactive environment
In [ ]:
data_cam_filt_rt_iso_df_annot.drop(["X171208_pigs_scat1_1_15_1.100_pos", "X171208_pigs_scat1_2_15_1.100_pos"], axis = 1, inplace = True)
data_cam_filt_rt_iso_df_annot.rename(columns={"X171208_pigs_LM3_1_11_1.50_pos": "X171208_pigs_LM3_11_1.50_pos",
                                             "X171206_pigs_scat1_3_15_pos_1.100": "X171208_pigs_scat1_15_1.100_pos"}, inplace=True)
In [ ]:
all_columns = data_cam_filt_rt_iso_df_annot.columns.tolist()
samples_columns = all_columns[all_columns.index('X171208_pigs_scat1_15_1.100_pos')
                              :all_columns.index('X171208_pigs_scat2_9_1.100_pos')+1]
In [ ]:
import numpy as np
import pandas as pd
from sklearn import manifold
from sklearn.metrics import euclidean_distances

seed = np.random.RandomState(seed=3)
data = pd.read_csv('data/big-file.csv')

#  start small dont take all the data, 
#  its about 200k records
subset = data[:10000]
similarities = euclidean_distances(subset)

mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, 
      random_state=seed, dissimilarity="precomputed", n_jobs=1)

pos = mds.fit(similarities).embedding_
In [ ]:
 
In [ ]:
 
In [ ]: